cd "C:\Users\David\Dropbox\Documents\Work\Clients & prospects\GiveWell\Criminal justice\Replications"

/*
odbc load, dsn(Crime) clear exec("select * from [Lofstrom & Raphael replication]")
saveold "Lofstrom & Raphael", version(12) replace
#delimit cr */

use "Lofstrom & Raphael", clear

global crimes Violent ViolentNonRape Property Murder Rape Robbery Assault Burglary Larceny MVTheft

egen id = group(PostalCode), label
levelsof id if PostalCode=="CA"
global treatId `r(levels)'
gen byte CA = PostalCode=="CA"
gen tm = ym(Year, Month)
format tm %tm
tsset id tm
gen byte post = tm >= tm(2011m10)
replace Rape = . if Year>=2013 // definition change
gen long Violent = Murder + Rape + Assault + Robbery
gen long ViolentNonRape = Murder + Assault + Robbery
gen long Property = Burglary + Larceny + MVTheft
replace PopJail = PopJail/1000
replace PopPrison = PopPrison/1000
gen long Inmates = PopPrison + PopJail
recode Inmates (. = 0) if Year>=2006

* crime reporting rates, 2009-14, in % -- but ended up just using average post (2012-13) ratios for stability
mat MurderReportRate   = 100, 100, 100, 100, 100, 100
mat RapeReportRate     = 50.0, 27, 28.2, 34.8, 33.6
mat AssaultReportRate  = 60.1, 60, 67, 62.4, 64.3, 58.4 // *Aggravated* assault
mat RobberyReportRate  = 57.9, 58, 66, 55.9, 68, 60.9
mat BurglaryReportRate = 58.8, 58, 52, 54.8, 57.3, 60.0
mat LarcenyReportRate  = 31.9, 32, 30, 26.4, 28.6, 29.0
mat MVTheftReportRate  = 83.4, 83, 83, 78.6, 75.5, 83.3
* Sources: BJS, Criminal Victimization, various years

foreach crime in $crimes {
	if !inlist("`crime'", "Violent", "Property") {
		cap noi gen double `crime'Vict = `crime' / (`crime'ReportRate[1, Year-2008] / 100) if Year>2008 & Year<=2014
	}
}
gen double ViolentVict  = Murder + RapeVict + RobberyVict + AssaultVict
gen double PropertyVict = BurglaryVict + MVTheftVict + LarcenyVict

* fancy linear interpolation of population by month
replace Pop = . if Month!=7 // all pop figures are for July 1
by id: ipolate Pop tm, gen(tmp) epolate
replace Pop = (F.tmp + tmp) / 2 // shift figures to mid-month
drop tmp
replace Pop = L.Pop + LD.Pop if Pop == . // extrapolate to fill gap in last month

gen double InmatesPop = Inmates / Pop * 1e8
levelsof InmatesPop if CA & inlist(tm, tm(2011m9), tm(2012m3))
tokenize `r(levels)'
scalar InmatePopDrop = abs(`1' - `2')

foreach var in $crimes {
	gen double `var'Pop = `var'/Pop * 100000 * 12 // annualized
	gen double l`var'Pop = ln(`var'Pop)
	cap noi gen double `var'VictPop  = `var'Vict/Pop * 100000 * 12 // annualized
	cap noi gen double l`var'VictPop  = ln(`var'VictPop)
}

* seasonally adjust by partialling out calendar month dummies for each state
foreach var in Burglary Larceny MVTheft Violent ViolentNonRape Property {
	gen l`var' = ln(`var')
	regress l`var'Pop i.id##i.Month if tm>=tm(2000m1), nocons
	predict double l`var'PopSA if e(sample), resid
	mat b = e(b)
	replace l`var'PopSA = l`var'PopSA + b[1,id] // add back state fixed effect for presentation
	gen double `var'PopSA = exp(l`var'PopSA)
	gen double `var'SA = `var'PopSA * Pop / 100000 / 12
	
	tsline `var'PopSA if PostalCode=="CA" & tm>=tm(2000m1), tline(`=tm(2011m10)-.5', lcolor(grey)) ///
		scheme(s1color) legend(off) ///
		graphregion(margin(none)) tlabel(2000m1(24)2014m1, format(%tmMon-YY)) xtitle("") ///
		ylabel(, angle(hor) format(%10.0gc)) ///
		text(121000 `=tm(2011m10)-1.5' "October 1, 2011" "Reforms enter effect", color(grey) just(right) place(sw)) ///
		name(`var', replace)
}

tsline lPropertyPopSA if PostalCode=="NV" & tm>tm(2000m1), title("Nevada") tline(`=tm(2011m10)-.5', lcolor(grey)) legend(off) graphregion(margin(none)) tlabel(2000m1(24)2014m1, format(%tmMon-YY)) xtitle("") ytitle("") ylabel(, angle(hor) format(%4.0gc))
tsline lPropertyPopSA if PostalCode=="AZ" & tm>tm(2000m1), title("Arizona") tline(`=tm(2011m10)-.5', lcolor(grey)) legend(off) graphregion(margin(none)) tlabel(2000m1(24)2014m1, format(%tmMon-YY)) xtitle("") ytitle("") ylabel(, angle(hor) format(%4.0gc))
tsline lPropertyPopSA if PostalCode=="OR" & tm>tm(2000m1), title("Oregon") tline(`=tm(2011m10)-.5', lcolor(grey)) legend(off) graphregion(margin(none)) tlabel(2000m1(24)2014m1, format(%tmMon-YY)) xtitle("") ytitle("") ylabel(, angle(hor) format(%4.0gc))
tsline lPropertyPopSA if PostalCode=="TX" & tm>tm(2000m1), title("Texas") tline(`=tm(2011m10)-.5', lcolor(grey)) legend(off) graphregion(margin(none)) tlabel(2000m1(24)2014m1, format(%tmMon-YY)) xtitle("") ytitle("") ylabel(, angle(hor) format(%4.0gc))

tsline lPropertyPopSA if PostalCode=="CA", lcolor(blue) || tsline lPropertyPopSA if PostalCode=="NV", lcolor(red) || if tm>=tm(2000m1), tline(`=tm(2011m10)-.5', lcolor(grey)) ///
	scheme(s1color) ///
	legend(lab(1 "California") lab(2 "Nevada") ring(0) pos(8) region(lwidth(none))) plotregion(margin(none)) ///
	graphregion(margin(none)) tlabel(2000m1(24)2014m1, format(%tmMon-YY)) xtitle("") ///
	ylabel(`=ln(2500)' "2500" `=ln(3000)' "3000" `=ln(3500)' "3500" `=ln(4000)' "4000" `=ln(4500)' "4500", angle(hor) format(%10.0gc)) ytitle("") ///
	text(`=ln(4850)' `=tm(2011m10)-1.5' "October 1, 2011" "Reforms enter effect" "in California", color(grey) just(right) place(sw)) ///
	name(PropertyCANV, replace)
graph export "PropertyCANV.png", replace width(1000)

* graph seasonally adjusted violent and property crime for states
foreach state in CA NV AZ OR {
	tsline PropertyPopSA, plotregion(lwidth(none)) lcolor(blue) || tsline ViolentNonRapePopSA, plotregion(lwidth(none)) yaxis(2) lcolor(red) || if PostalCode=="`state'" & tm>tm(2000m1), tline(`=tm(2011m10)-.5', lcolor(cranberry)) ///
		scheme(s1color) legend(off) graphregion(margin(zero)) ///
		tlabel(2000m1(24)2014m1, format(%tmMonYY)) xtitle("") ///
		ytitle("Property crimes/100,000 residents", color(blue)) ytitle("Violent crimes/100,000 residents, excluding rape", color(red) axis(2) orient(rvert)) ///
		ylabel(, angle(hor) format(%10.0gc) labcolor(blue) tlcolor(blue)) ylabel(, axis(2) format(%10.0gc) angle(hor) labcolor(red) tlcolor(red)) ///
		text(121000 `=tm(2011m10)-2' "October 1, 2011" "Reforms enter effect", just(right) place(sw)) ///
		yscale(lcolor(blue)) yscale(lcolor(red) axis(2)) name(`state', replace)
	graph export "`state' violent & property crime by month.png", width(666) replace
}
graph combine OR NV AZ, cols(1)

* graph incarcerated population by month
gen int _PopJail = round(PopJail)
gen int _PopPrison = round(PopPrison)
gen int _Inmates = round(Inmates)
twoway connected PopJail PopPrison Inmates tm, mlab(_PopJail _PopPrison _Inmates) mlabpos(12...) mlabsize(*.8...) || if CA & Year>=2011 & tm<=tm(2013m1), ///
	tline(2011m9) yscale(range(0 .)) ylabel(none) tscale(range(2011m1 2013m2)) tlabel(2011m1(12)2013m1, format(%tmMon-YY)) yscale(off) ///
	legend(off) plotregion(lwidth(none)) graphregion(margin(7 0 0 3)) ///
	scheme(s1color) ///
	text(237 `=tm(2013m1)+.25' "Prison + jail" ///
	     157 `=tm(2013m1)+.25' "Prison" ///
	     105 `=tm(2013m1)+.25' "Jail" ///
	     17 `=tm(2011m10)-1.25' "October 1, 2011" "Reforms enter effect", color(grey) just(right) place(sw)) xtitle("")
graph export "CA inmate population by month.png", width(666) replace

* not used in text: Buonanno & Raphael-like treatment of post-realignment 6-month adjustment period
gen ymonthrel = tm - tm(2011m10)
foreach crime in Property Violent {
	sureg (`crime'Pop c.ymonthrel##c.ymonthrel##post i.Month) (InmatesPop c.ymonthrel##c.ymonthrel##post i.Month) if PostalCode=="CA" & tm>=tm(2006m10) & tm<=tm(2012m3), small isure
	nlcom -( [`crime'Pop]ymonthrel + [`crime'Pop]1.post#c.ymonthrel + ([`crime'Pop]c.ymonthrel#c.ymonthrel+ [`crime'Pop]c.ymonthrel#c.ymonthrel#1.post) * (2*6+1)) / ///
				 ( [InmatesPop]ymonthrel + [InmatesPop]1.post#c.ymonthrel + ([InmatesPop]c.ymonthrel#c.ymonthrel+ [InmatesPop]c.ymonthrel#c.ymonthrel#1.post) * (2*6+1))
}

***
*** Annual data, with data year beginning 10/1 so we don't have to drop calendar year 2011, the year containing the reform
***

cap program drop PrepAnnual
program define PrepAnnual
	global crimes Violent ViolentNonRape Property Murder Rape Robbery Assault Burglary Larceny MVTheft
	use "Lofstrom & Raphael", clear
	recode Rape* (* = .) if Year>=2013 // rape definition changed, so zap figures after 2013 (FBI annual data includes old-definition estimates, but not monthly data)
	gen long Violent = Murder + Rape + Assault + Robbery
	gen long ViolentNonRape = Murder + Assault + Robbery
	gen long Property = Burglary + Larceny + MVTheft
	if "`1'"!="" replace Year = Year + 1 if Month>=10
	gen byte CA = PostalCode=="CA"
	gen byte post = ym(Year, Month) >= tm(2011m10)
	collapse CA post (rawsum) $crimes Popsum=Pop [aw = Pop], by(Year PostalCode)
	gen long Pop = Popsum / 12

	* crime reporting rates, 2009-14, in % -- but ended up just using average post (2012-13) ratios for stability
	mat MurderReportRate   = 100, 100, 100, 100, 100, 100
	mat RapeReportRate     = 50.0, 27, 28.2, 34.8, 33.6
	mat AssaultReportRate  = 60.1, 60, 67, 62.4, 64.3, 58.4 // *Aggravated* assault
	mat RobberyReportRate  = 57.9, 58, 66, 55.9, 68, 60.9
	mat BurglaryReportRate = 58.8, 58, 52, 54.8, 57.3, 60.0
	mat LarcenyReportRate  = 31.9, 32, 30, 26.4, 28.6, 29.0
	mat MVTheftReportRate  = 83.4, 83, 83, 78.6, 75.5, 83.3
	* Sources: BJS, Criminal Victimization, various years

	foreach crime in $crimes {
		if !inlist("`crime'", "Violent", "ViolentNonRape", "Property") {
			cap noi gen double `crime'Vict = `crime' / (`crime'ReportRate[1, Year-2008] / 100) if Year>=2009 & Year<=2014
		}
	}
	gen double ViolentVict  = Murder + RapeVict + RobberyVict + AssaultVict
	gen double PropertyVict = BurglaryVict + MVTheftVict + LarcenyVict

	foreach Vict in "" Vict {
		foreach var of varlist $crimes {
			cap gen double `var'`Vict'Pop = `var'`Vict'/Pop * 1e5
			cap gen double l`var'`Vict'Pop = ln(`var'`Vict'Pop)
		}
	}

	egen id = group(PostalCode), label
	levelsof id if PostalCode=="CA"
	global treatId `r(levels)'
	tsset id Year
end


***
*** static panel regressions
***

PrepAnnual shift
keep if Year<=2013 // focus on short-term (2-year) impact
gen byte DT = PostalCode=="CA" & Year==2012
set seed 987654321
set scheme s1color
frmttable, clear(Results)
frmttable, clear
mat dcols = 0,0,1
foreach crime in Murder Rape Robbery Assault Burglary Larceny MVTheft {
	qui regress D.l`crime'Pop DT i.Year [pw = Pop]
	cap drop yhat
	predict double yhat if e(sample)
	regress D.l`crime'Pop i.DT i.Year [pw = exp(yhat)*Pop], cluster(id)
	boottest 1.DT, reps(9999) ptype(equal) level(90) gridpoints(50) graphname(`crime', replace) graphopt(title("`crime'") ytitle(`=cond(inlist("`crime'","Murder","Burglary"),"p value","")'))
	mat ci = r(CI)
	scalar p = r(p)
	sum `crime'Pop if PostalCode=="CA" & Year==2011, meanonly // observed rates in treatment period
	scalar ReportRate = (`crime'ReportRate[1,2012-2008]+`crime'ReportRate[1,2013-2008])/200 // average reporting rate for 2012-13
	mat b_ci =            _b[1.DT], ci
	mata st_matrix("b_ci",  st_matrix("b_ci") \ (1 \ 1/`=InmatePopDrop' \ 1/`=ReportRate' \ 1/`=InmatePopDrop'/`=ReportRate') * (exp(st_matrix("b_ci")):-1) * `r(mean)')
	mat stars = (p < 0.01) + (p < 0.05) + (p < 0.1) , 0 
	mat stars = stars \ stars \ stars \ stars \ stars
	local sdec
	forvalues i=1/`=rowsof(b_ci)' {
		forvalues j=1/2 {
			if `i' > 1 | `j' > 1 local sdec `sdec' \
			local sdec `sdec' `=max(0,2-ceil(log10(abs(b_ci[`i',`j']))))'
		}
	}
	frmttable, statmat(b_ci) annotate(stars) asymbol(*,**,***) doubles(dcols) square dbldiv(", ") substat(1) sdec(`sdec') merge 
}
frmttable using "Lofstrom & Raphael panel", replace replay(Results) append store(Results) nocenter coljust(c{c}c)
graph combine Murder Rape Robbery Assault Burglary Larceny MVTheft, scheme(s1color) cols(4) graphregion(margin(zero))
graph export "Lofstrom & Raphael wild bootstrap panel.png", replace width(666)


***
*** replication of synthetic control comparisons with baseline = 2011 (meaning 10/1/10-10/1/11)
***

* closest match to original?
PrepAnnual
keep if Year>=2000
global postYears 2012 2013
cap mat drop Res
foreach crime in $crimes {
	synth `crime'Pop `crime'Pop(2000) `crime'Pop(2001) `crime'Pop(2002) `crime'Pop(2003) `crime'Pop(2004) `crime'Pop(2005) `crime'Pop(2006) `crime'Pop(2007) `crime'Pop(2008) `crime'Pop(2009) `crime'Pop(2010) ///
		, trunit($treatId) trperiod(2012) keep("LofstromRaphaelReplicationSynth", replace) resultsperiod(2000(1)2010 $postYears)

	mat Y_synthetic = e(Y_synthetic)
	mat Y_treated   = e(Y_treated)
	scalar Diff     = (Y_treated[12,1] + Y_treated[13,1])/2 - (Y_synthetic[12,1] + Y_synthetic[13,1])/2
	scalar DiffDiff = (Y_treated[12,1] + Y_treated[13,1])/2 - (Y_synthetic[12,1] + Y_synthetic[13,1])/2 - ((Y_treated[11,1])/1 - (Y_synthetic[11,1])/1) ///  baseline = 2010
										 
	preserve
	use "LofstromRaphaelReplicationSynth", clear
	line _Y_treated _Y_synthetic _time if _time>=2000, xline(2010) xlabel(2000/2013) legend(off) title("`crime'")
	restore

	mata DiffDiffDist = J(0,1,0)
	levelsof id
	foreach i in `r(levels)' {
		if `i' != $treatId {
			synth `crime'Pop `crime'Pop(2000) `crime'Pop(2001) `crime'Pop(2002) `crime'Pop(2003) `crime'Pop(2004) `crime'Pop(2005) `crime'Pop(2006) `crime'Pop(2007) `crime'Pop(2008) `crime'Pop(2009) `crime'Pop(2010) ///
				, trunit(`i') trperiod(2011) mspeperiod(2000(1)2010) resultsperiod(2000(1)2010 $postYears)

			mat Y_synthetic = e(Y_synthetic)
			mat Y_treated   = e(Y_treated)
			mata DiffDiffDist = DiffDiffDist \ `= (Y_treated[12,1] + Y_treated[13,1])/2 - (Y_synthetic[12,1] + Y_synthetic[13,1])/2 - ((Y_treated[11,1])/1 - (Y_synthetic[11,1])/1)'
		}
	}

	mata st_numscalar("p1", mean(`=DiffDiff':<DiffDiffDist))
	mata st_numscalar("p2", mean(abs(`=DiffDiff'):<abs(DiffDiffDist)))
	mat Res = nullmat(Res), (DiffDiff \ p1 \ p2)
}
mat colnames Res = $crimes
mat rownames Res = Impact "One-tailed p" "Two-tailed p"
matlist Res


***
*** Revised synthetic control regressions
***

PrepAnnual shift // shift to October 1 years
keep if Year>=2000

global crimes Violent Murder Rape Assault Robbery Property Burglary Larceny MVTheft
global BaseYears 2010 2011 // before period for Diff-Diff
global MatchYears 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011

foreach crime in $crimes {
	global `crime'Predictors
	forvalues y = 2000/2011 {
		global `crime'Predictors ${`crime'Predictors} `crime'Pop(`y')
	}
}
global cViolentPredictors $MurderPredictors $RapePredictors $RobberyPredictors $AssaultPredictors
global cPropertyPredictors $BurglaryPredictors $LarcenyPredictors $MVTheftPredictors

levelsof id
foreach id in `r(levels)' { // make common weight matrices for each state
	synth ViolentPop $cViolentPredictors, trunit(`id') trperiod(2012) resultsperiod(2000/2012)
	mata st_global("cViolentV`id'", invtokens(strofreal(diagonal(st_matrix("e(V_matrix)"))')))

	synth PropertyPop $cPropertyPredictors, trunit(`id') trperiod(2012) resultsperiod(2000/2012)
	mata st_global("cPropertyV`id'", invtokens(strofreal(diagonal(st_matrix("e(V_matrix)"))')))
}

foreach common in "" common { // use common control for all violent/property crimes?
	mata Res = J(4,0,0)
	cap mat drop RMSPE
	foreach crime in $crimes {
		local ccrime = cond("`common'"=="", "`crime'", cond(inlist("`crime'","Property","Burglary","Larceny","MVTheft"), "cProperty", "cViolent"))

		global PostYears = "2012 " + cond(inlist("`crime'","Rape","Violent"),"","2013")
		cap drop `crime'PopGap
		gen `crime'PopGap = .
		cap drop `crime'PopGap
		gen `crime'PopGap = .

		mata MSPEratio = DiffTreatedDist = DiffControlDist = J(0,1,0)
		cap mat drop _RMSPE

		levelsof id
		foreach id in `r(levels)' { // bootstrap distribution of Diff-Diff RMSPE
			synth `crime'Pop ${`ccrime'Predictors}, ///
				trunit(`id') trperiod(2012) mspeperiod($MatchYears) resultsperiod($MatchYears $PostYears) ///
				`=cond("`common'"=="","","customV(${`ccrime'V`id'})")' ///
				keep("LofstromRaphaelReplicationSynth", replace)
			mat _RMSPE = nullmat(_RMSPE) \ e(RMSPE)

			if `id'==$treatId {
				preserve
				use "LofstromRaphaelReplicationSynth", clear
				line _Y_treated _Y_synthetic _time if _time>=2000, xline(2011.5) xlabel(2000/2013) ylabel(, angle(hor)) xtitle("") lcolor(black gray) lwidth(medthick) ///
					legend(lab(1 "California") lab(2 "Synthetic California") ring(0) pos(8) region(lwidth(none))) plotregion(margin(none)) ///
					name(`crime'`common', replace)
				graph export "Lofstrom & Raphael `crime' `common'.png", replace width(666)
				restore
			}

			mat gap = e(Y_treated) - e(Y_synthetic)
			forvalues y=1/`=rowsof(gap)' {
				replace `crime'PopGap = gap[`y',1] if id==`id' & Year == `:word `y' of `:rownames gap''
			}
			mata DiffTreatedDist = DiffTreatedDist \ mean(st_matrix("e(Y_treated)"  )[strtoreal(tokens("$PostYears")):-1999]) - mean(st_matrix("e(Y_treated)"  )[strtoreal(tokens("$BaseYears")):-1999])
			mata DiffControlDist = DiffControlDist \ mean(st_matrix("e(Y_synthetic)")[strtoreal(tokens("$PostYears")):-1999]) - mean(st_matrix("e(Y_synthetic)")[strtoreal(tokens("$BaseYears")):-1999])
			mata MSPEratio = MSPEratio \ mean((st_matrix("e(Y_synthetic)")-st_matrix("e(Y_treated)"))[strtoreal(tokens("$PostYears")):-1999]:^2) / mean((st_matrix("e(Y_synthetic)")-st_matrix("e(Y_treated)"))[strtoreal(tokens("$MatchYears")):-1999]:^2)
		}

		mata DiffDiffDist = DiffTreatedDist - DiffControlDist
		mata DiffDiff = DiffDiffDist[$treatId]
		mata Res = Res, (DiffDiff \ (sum(DiffDiff :<DiffDiffDist)+.5)/rows(DiffDiffDist) \ (sum(abs(DiffDiff):<abs(DiffDiffDist))+.5)/rows(DiffDiffDist) \ (sum(MSPEratio[$treatId]:<MSPEratio)+.5) /rows(MSPEratio)) // p values based on Diff-diff, 1 and 2 tailed, and MSPE ratio
		mat RMSPE = nullmat(RMSPE), _RMSPE

		line `crime'PopGap Year if Year>=2000 & Year<=2014, cmissing(n) lcolor(gray) lwidth(vvvthin) || ///
			line `crime'PopGap Year if PostalCode=="CA" & Year>=2000 & Year<=2014, ///
			legend(lab(1 "Other states") lab(2 "California") order(2 1) ring(0) pos(8) region(lwidth(none))) ///
			lcolor(black) lwidth(medthick) xline(2011) xtitle("") ytitle("") xlabel(2000/2013) ylabel(, angle(hor)) ///
			text(300 2011.1 "October 1, 2011:" "reforms" "enter force" "in California", place(se) just(left)) plotregion(margin(none)) ///
			name(`crime'Gap, replace)
		graph export "Lofstrom & Raphael `crime' `common' gap.png", replace width(666)
	}
	mat RMSPE`common' = RMSPE
	levelsof PostalCode
	mat rownames RMSPE`common' = `r(levels)'
	mat colnames RMSPE`common' = $crimes
	matlist RMSPE`common'
	mata st_matrix("Res`common'", Res)
	mat colnames Res`common' = $crimes
	mat rownames Res`common' = Impact "One-tailed diff-diff p" "Two-tailed diff-diff p" "MSPE ratio p"
}
frmttable using "Lofstrom & Raphael synth", statmat(Res)       replace  nocenter coljust(c{c}c)      // <--- basis for revised results in report table 
frmttable using "Lofstrom & Raphael synth", statmat(Rescommon) addtable nocenter coljust(c{c}c) // <--- basis for revised results in report table, common benchmark within property & violent crime

* rerun property syntheses to display weights cited in text
synth PropertyPop  $PropertyPredictors, trunit($treatId) trperiod(2012) resultsperiod($MatchYears $PostYears)
synth PropertyPop $cPropertyPredictors, trunit($treatId) trperiod(2012) resultsperiod($MatchYears $PostYears)
